library(Seurat)Attaching SeuratObject
library(cowplot)
library(ggplot2)library(Seurat)Attaching SeuratObject
library(cowplot)
library(ggplot2)dataDirs <- list(
day60_old = list(
cellRanger = "/srv/gstore4users/p29781/o29804_CellRangerCount_2022-11-04--11-20-44/day60oticdiff_Library_418886_1",
seuratReport = "/srv/gstore4users/p29781/o29804_ScSeurat_2022-11-08--10-01-51/day60oticdiff_Library_418886_1_SCReport",
labeledObject = "/srv/gstore4users/p29781/additional-analyses/2022-11-28-relabel-day60"
),
day60_new = list(
cellRanger = "/srv/gstore4users/p29781/o30306_CellRangerCount_2022-12-20--11-08-02/IEOday60_controlprotocol",
scData = "/srv/gstore4users/p29781/o30306_ScSeurat_2022-12-20--22-03-42/IEOday60_controlprotocol_SCReport")
)
dataDirs$day60_old
$day60_old$cellRanger
[1] "/srv/gstore4users/p29781/o29804_CellRangerCount_2022-11-04--11-20-44/day60oticdiff_Library_418886_1"
$day60_old$seuratReport
[1] "/srv/gstore4users/p29781/o29804_ScSeurat_2022-11-08--10-01-51/day60oticdiff_Library_418886_1_SCReport"
$day60_old$labeledObject
[1] "/srv/gstore4users/p29781/additional-analyses/2022-11-28-relabel-day60"
$day60_new
$day60_new$cellRanger
[1] "/srv/gstore4users/p29781/o30306_CellRangerCount_2022-12-20--11-08-02/IEOday60_controlprotocol"
$day60_new$scData
[1] "/srv/gstore4users/p29781/o30306_ScSeurat_2022-12-20--22-03-42/IEOday60_controlprotocol_SCReport"
resultDir <- "/scratch/jcarreno/sta426_project/results"First we load the data from the server and we identify the source of each dataset
oldAnalysis <- readRDS(file = paste(dataDirs$day60_old$seuratReport, "/scData.rds",sep = ""))
newAnalysis <- readRDS(file = paste(dataDirs$day60_new$scData, "/scData.rds",sep = ""))oldAnalysis$experiment <- "OLD"
newAnalysis$experiment <- "NEW"Now, we can merge the datasets using FindIntegrationAnchors() and IntegrateData() from Seurat package.
anchors <- FindIntegrationAnchors(object.list = list(oldAnalysis, newAnalysis), dims = 1:20)combined <- IntegrateData(anchorset = anchors, dims = 1:20)combined <- readRDS(file = "combinedSeurat.rds")We can perform now basic visual inspection to check if the integration has been done properly
DefaultAssay(combined) <- "integrated"# Run the standard workflow for visualization and clustering
combined <- ScaleData(combined, verbose = FALSE)
combined <- RunPCA(combined, npcs = 30, verbose = FALSE)
ElbowPlot(combined)combined <- RunUMAP(combined, reduction = "pca", dims = 1:20)Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
This message will be shown once per session
18:12:01 UMAP embedding parameters a = 0.9922 b = 1.112
18:12:01 Read 12137 rows and found 20 numeric columns
18:12:01 Using Annoy for neighbor search, n_neighbors = 30
18:12:01 Building Annoy index with metric = cosine, n_trees = 50
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
18:12:03 Writing NN index file to temp file /tmp/Rtmp5P1IDU/file653b717a9c48
18:12:03 Searching Annoy index using 1 thread, search_k = 3000
18:12:08 Annoy recall = 100%
18:12:08 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
18:12:10 Initializing from normalized Laplacian + noise (using irlba)
18:12:10 Commencing optimization for 200 epochs, with 494078 positive edges
18:12:17 Optimization finished
combined <- FindNeighbors(combined, reduction = "pca", dims = 1:20, k.param = 9)Computing nearest neighbor graph
Computing SNN
combined <- FindClusters(combined, resolution = 0.5)Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
Number of nodes: 12137
Number of edges: 138573
Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.9188
Number of communities: 21
Elapsed time: 0 seconds
p1 <- DimPlot(combined, reduction = "umap", group.by = "experiment")
p2 <- DimPlot(combined, reduction = "umap", label = TRUE)
plot_grid(p1, p2)DimPlot(combined, reduction = "umap", split.by = "experiment")Save the combined data
#saveRDS(combined, file = "combinedSeurat.rds")
#combined <- readRDS(file = "combinedSeurat.rds")
DefaultAssay(combined) <- "RNA"
combined <- ScaleData(combined, verbose = FALSE)Note: MYO7A is a gene used by Steinhard et al. However, it does not appear in our analysis as it has likely been discarded due to its low presence during QC or during the merging of both datasets.
ghc <- c('ATOH1',
'MYO7A',
'MYO6',
'PCP4',
'ANXA4',
'GFI1',
'ESPN',
'TMPRSS3',
'BDNF',
'CCER2',
'GNG8',
'POU4F3',
'OTOF',
'FSIP1',
'ZBBX',
'SKOR1',
'DNAH5',
'SCL26A5',
'GATA3',
'LMOD3',
'FGF8',
'INSM1',
'DNM3'
)for (marker in ghc){
tryCatch({print(FeaturePlot(combined, reduction = "umap", features = marker, order= TRUE))},
error=function(e){print("ERROR")},
warning=function(w){print(paste(marker, "not present"))})
}[1] "SCL26A5 not present"
Show subset of clusters (small clusters, those that are likely to be of interest)
DoHeatmap(combined, features = ghc, cells = WhichCells(combined, idents = c("8","9","10","11","12","13","14","15","16","17","18","19","20")))Warning in DoHeatmap(combined, features = ghc, cells = WhichCells(combined, :
The following features were omitted as they were not found in the scale.data
slot for the RNA assay: SCL26A5
Number of cells per cluster:
cellCountperCluster <- data.frame(id = Idents(combined))
barplot = ggplot(data=cellCountperCluster, aes(x=id)) +
geom_bar() +
geom_text(stat='count', aes(label=..count..), vjust=-1) +
theme_minimal()
barplot + labs(x="Cluster", y = "Cell count")Warning: The dot-dot notation (`..count..`) was deprecated in ggplot2 3.4.0.
ℹ Please use `after_stat(count)` instead.
DotPlot(combined, features=ghc) + coord_flip() + theme(axis.text.x = element_text(size = 8)) Warning in FetchData.Seurat(object = object, vars = features, cells = cells):
The following requested variables were not found: SCL26A5
for (marker in ghc){
tryCatch({print(print(VlnPlot(combined, marker)))},
error=function(e){print("ERROR")},
warning=function(w){print(paste(marker, "not present"))})
}[1] "ERROR"
cells.use <- WhichCells(combined, idents = '20')
DimPlot(combined, reduction = "umap", group.by = "ident", cells.highlight = cells.use, sizes.highlight = 0.3) + NoLegend()Cluster 20 is likely to be HC cluster
oep <- c(
'EPCAM',
'CDH1',
'SOX2',
'SIX1',
'OC90',
'SOX10',
'FBXO2',
'LMX1A',
'PAX2',
'PAX8',
'DLX5',
'GBX2',
'JAG1',
'TBX2',
'COL9A2',
'OTOA',
'MYO5C',
'OTOL1',
'USH1C',
'PCDH9')for (marker in oep){
tryCatch({print(FeaturePlot(combined, reduction = "umap", features = marker, order= TRUE))},
error=function(e){print("ERROR")},
warning=function(w){print(paste(marker, "not present"))})
}DoHeatmap(combined, features = ghc)Warning in DoHeatmap(combined, features = ghc): The following features were
omitted as they were not found in the scale.data slot for the RNA assay: SCL26A5
DotPlot(combined, features=oep) + coord_flip() + theme(axis.text.x = element_text(size = 8)) for (marker in oep){
tryCatch({print(VlnPlot(combined, marker))},
error=function(e){print("ERROR")},
warning=function(w){print(paste(marker, "not present"))})
}Cluster 13 is likely to be OEP cells
hc_stei <- c(
"ATOH1",
"MYO7A",
"OTOF",
"STRC",
"ESPN",
"GPX2",
"PCDH15",
"CDH23",
"USH2A",
"POU4F3",
"CABP2",
"GFI1",
"USH1C",
"RIPOR2",
"MYO6",
"MYO15A",
"CIB2",
"PCP4",
"CALM2",
"LHFPL5"
)oep_stei <- c(
"PAX8",
"PAX2",
"OC90",
"HMX3",
"DLX3",
"DLX5",
"SALL4",
"DUSP6",
"SPRY2",
"SIX1",
"EYA1",
"FOXG1",
"LMX1A",
"OTOA",
"APOE",
"SMOC2",
"SPARCL1",
"FBXO2",
"COL11A1",
"COL9A2"
)VlnPlot(combined, hc_stei, idents = c("20", "13"))VlnPlot(combined, oep_stei, idents = c("20", "13"))